This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

#Load some useful libraries
library(limma)
library(DESeq2)
library(edgeR)
library(data.table)
library(tidyr)
library(magrittr)
library(tidyverse)
library(sjlabelled)
library(sjmisc)
library(ggplot2)
library(compositions)
library(broom)
library(DataCombine)
library(ggsignif)
library(ggpubr)
#import data
countdf <- read.csv(file = 'labs.csv')
rownames(countdf) <- NULL
meta <- read.csv(file = 'labs_metadata.csv')
rownames(meta) <- NULL
full <- read.csv(file = 'labs_full.csv')
rownames(full) <- NULL

#clean-up tables
#prepare for model
#make consistent index/column/label/ordering between tables/matrices
names(countdf)[1] <- 'name'
countdf[countdf==0] <- 0.0000001

# remove added X's from PCI names
names(countdf) <- sapply(str_remove_all(names(countdf),"X"),"[") 
count_matrix <- countdf
count_matrix <- count_matrix[,2:ncol(count_matrix)]
count_matrix <- as.matrix(count_matrix)
names(full) <- sapply(str_remove_all(colnames(full),"X"),"[")
names(count_matrix) <- sapply(str_remove_all(colnames(count_matrix),"X"),"[")

#factorize sex
meta$sex <- factor(meta$sex)
full$sex <- factor(full$sex)

#factorize BMF
meta$bowel <- factor(as.numeric(factor(meta$bowel, levels = c(1,2,3,4), labels = c(1,2,3,4))))
full$bowel <- factor(as.numeric(factor(full$bowel, levels = c(1,2,3,4), labels = c(1,2,3,4))))

#set reference category to be "High Normal" BMF (level = 3)
meta <- within(meta, bowel <- relevel(bowel, ref = 3))
full <- within(full, bowel <- relevel(bowel, ref = 3))

as.data.frame(count_matrix)
countdf <- DropNA(countdf)
No Var specified. Dropping all NAs from the data frame.

0 rows dropped from the data frame because of missing values.
meta <- DropNA(meta)
No Var specified. Dropping all NAs from the data frame.

0 rows dropped from the data frame because of missing values.
full <- DropNA(full)
No Var specified. Dropping all NAs from the data frame.

0 rows dropped from the data frame because of missing values.
countdf
meta
full
#design GLM using lmFit and eBayes
#Algorithm provided by and adapted from Christian Diener, PhD:
###########################################################################################################
design <- model.matrix(~bowel + sex + age + BMI_CALC + eGFR, full)
dge <- DGEList(counts=count_matrix)  # where `count_matrix` is the matrix mentioned above
logCPM <- cpm(dge, log=TRUE)  # takes the log of the data
fit <- lmFit(logCPM, design)  # fits the model for all labs
fit <- eBayes(fit)  # stabilizes the variances
###########################################################################################################
#Pre-process for plotting:

#Get results table for Constipation coefficient relative to High Normal BMF:
re_const <- topTable(fit, coef = 2, genelist = countdf$name, sort="p", number="none")  # Select the significant models by coefficient 2
re_low <- topTable(fit, coef = 3, genelist = countdf$name, sort="p", number="none")  # Select the significant models by coefficient 3
re_diarrhea <- topTable(fit, coef = 4, genelist = countdf$name, sort="p", number="none")  # Select the significant models by coefficient 4

indices <- match(rownames(re_const), rownames(meta)) # associate column of labs names with index of lab
re_const[1] <- countdf$name[indices] # associate the labs names with the labs indices
p_const <- re_const[re_const$adj.P.Val < 0.05,] # create df of just significant adj P value results
p_const <- p_const[order(p_const$adj.P.Val),] # order by adj P value
p_const <- p_const[,c('logFC','B','adj.P.Val','ID','P.Value')] # keep only desired columns

#Get results table for Low Normal coefficient relative to High Normal BMF:
indices <- match(rownames(re_low), rownames(meta)) # associate column of labs names with index of lab
re_low[1] <- countdf$name[indices] # associate the labs names with the labs indices
p_low <- re_low[re_low$adj.P.Val < 0.05,] # create df of just significant adj P value results
p_low <- p_low[order(p_low$adj.P.Val),] # order by adj P value
p_low <- p_low[,c('logFC','B','adj.P.Val','ID','P.Value')] # keep only desired columns

#Get results table for Diarrhea coefficient relative to High Normal BMF:
indices <- match(rownames(re_diarrhea), rownames(meta)) # associate column of labs names with index of lab
re_diarrhea[1] <- countdf$name[indices] # associate the labs names with the labs indices
p_diarrhea <- re_diarrhea[re_diarrhea$adj.P.Val < 0.05,] # create df of just significant adj P value results
p_diarrhea <- p_diarrhea[order(p_diarrhea$adj.P.Val),] # order by adj P value
p_diarrhea <- p_diarrhea[,c('logFC','B','adj.P.Val','ID','P.Value')] # keep only desired columns



#Show dfs of significant hits (there are none)
sig_const <- p_const[which(p_const$adj.P.Val < 0.05),]
sig_low <- p_low[which(p_low$adj.P.Val < 0.05),]
sig_diarrhea <- p_diarrhea[which(p_diarrhea$adj.P.Val < 0.05),]
sig_low$bowel <- 'Low Normal'
sig_low
sig_diarrhea
sig_const
#Main Text Figures
comparisons = list(c("Constipation","High Normal"),c("Low Normal","High Normal"),c("Diarrhea","High Normal"))
titles = list(c('OMEGA-6/OMEGA-3 RATIO'),c('EPA'),c('HOMOCYSTEINE, SERUM'),c('EOSINOPHILS'), c('BASOPHILS'))

myplots_main <- list()  # new empty list
#annotated only
labs_testanno <- cbind(full[,1],full[,6],as.data.frame(clr(full[,7:ncol(full)][,as.numeric(rownames(sig_low))])))
names(labs_testanno)[1] <- 'public_client_id'
names(labs_testanno)[2] <- 'bowel'
labs_testanno$bowel <- factor(labs_testanno$bowel, levels=c(1,2,3,4), labels = c("Constipation","Low Normal","High Normal","Diarrhea"))
labs_testanno$bowel <- factor(labs_testanno$bowel, levels=c("Constipation","Low Normal","High Normal","Diarrhea"), labels = c("Constipation","Low Normal","High Normal","Diarrhea"))

#Plotting:
#######################################################################
counter<<-1

# Annotation function:
sig = function(x){
  if(x < 0.001){"***"} 
  else if(x < 0.01){"**"}
  else if(x < 0.05){"*"}
  else{NA}}

# Special Graphing Function:
########################################
test = function(x,y,z,j) {
  x = NULL
  y = NULL
  
  temp<- names(labs_testanno)[[j+2]]
  temp_size <- merge(labs_testanno[,c(1,2,j+2)],labs_testanno[,c('public_client_id','bowel')])
  temp_size <- temp_size[,c('bowel',temp)]
  temp_size <- temp_size[which(!is.na(temp_size)),]
  temp_size <- length(levels(factor(temp_size$bowel)))
  
  if (counter==1) {
    results = ifelse(nrow(sig_low[which(sig_low['bowel'] == 'Constipation'),]['adj.P.Val'])!=0,
           list(p.value = sig_low[which(sig_low['bowel'] == 'Constipation'),]['adj.P.Val'][[1]]),
           list(p.value = 1))
  }
  else if (counter==2) {
    results = ifelse(nrow(sig_low[which(sig_low['bowel'] == 'Low Normal'),]['adj.P.Val'])!=0,
           list(p.value = sig_low[which(sig_low['bowel'] == 'Low Normal'),]['adj.P.Val'][[1]]),
           list(p.value = 1))
  }
  else {
     results = ifelse(nrow(sig_low[which(sig_low['bowel'] == 'Diarrhea'),]['adj.P.Val'])!=0,
           list(p.value = sig_low[which(sig_low['bowel'] == 'Diarrhea'),]['adj.P.Val'][[1]]),
           list(p.value = 1))
  }
  
  if (temp_size == 2) { #if the # of levels to the omic is 2 categories (incl High Normal)
    counter<<-1
    }
  
  if (temp_size == 3) { #if the # of levels to the omic is 3 categories (incl High Normal)
    counter<<-counter+1
    if (counter > 2) {counter<<-1}
  }
  
  if (temp_size == 4) { #if the # of levels to the omic is all 4 categories (incl High Normal)
    counter<<-counter+1
    if (counter > 3) {counter<<-1}
  }
  
  names(results) <- 'p.value'
  return(results)
}
##########################################

#for every significant hit, plot the graph for it and annotate it properly using the above function:
for (ind in 1:nrow(sig_low)) {
  y_name <- paste(names(labs_testanno)[c(ind+2)],sep="")
  myplots_main[[ind]] <- local({
    plotlim_loweranno = min(labs_testanno[[y_name]])
    plotlim_upperanno = max(labs_testanno[[y_name]])
    plotlim_baranno = plotlim_loweranno - 3.5
    plotlim_marginanno = plotlim_loweranno - 5
    labs_testanno <- labs_testanno[,c('bowel','public_client_id',y_name)]
    plt_anno <- ggplot(data = labs_testanno, aes(x = bowel, y = .data[[y_name]], group = bowel)) +
    scale_x_discrete(guide = guide_axis(n.dodge = 2))+
    geom_jitter(aes(color = bowel),  size = 0.1, cex = 0.05) +
    geom_boxplot(alpha=0.0,outlier.shape = NA) +
    theme(text = element_text(size = 9)) +
    ggtitle(label = str_wrap(titles[[ind]], width = 2)) +
    geom_signif(comparisons = comparisons, map_signif_level = sig, test = 'test', test.args = list(z = comparisons, j = ind), 
                y_position = plotlim_baranno, 
                step_increase = 0.15,  size = 0.5, 
                textsize = 1.5,
                tip_length = c(0,0)) +
    coord_cartesian(ylim=c(plotlim_loweranno,plotlim_upperanno),clip="off")+
    labs(color = "BMF Category", y = ifelse(ind == 1, "Log-Transformed\n Clinical Labs Level",""))+
    guides(colour = guide_legend(override.aes = list(size=7), title.position = 'left', nrow = 1, ncol = 4)) +
      
      
    theme(plot.margin = unit(c(0,0,abs(plotlim_marginanno),0), "cm"),
          legend.title = element_text(size=10), 
          legend.text = element_text(size=7),
          axis.text.x = element_blank(), 
          axis.title.x = element_blank()) +
      
    scale_fill_manual(limits = c("Constipation","Low Normal","High Normal","Diarrhea"), labels = c("Constipation","Low Normal","High Normal","Diarrhea"), values = colors(),
                    drop = FALSE)
  })
}
Warning: Duplicated aesthetics after name standardisation: size
Warning: Duplicated aesthetics after name standardisation: size
Warning: Duplicated aesthetics after name standardisation: size
Warning: Duplicated aesthetics after name standardisation: size
Warning: Duplicated aesthetics after name standardisation: size
#store the figures to save them:
counter <<- 1
figure_main <- ggarrange(plotlist = myplots_main[1:length(myplots_main)], labels = LETTERS[1:5], legend = "top", align = "hv", common.legend = TRUE, nrow = 1, ncol = 5)
Warning in if (x < 0.001) { :
  the condition has length > 1 and only the first element will be used
Warning: Removed 8 rows containing missing values (geom_signif).
Warning in if (x < 0.001) { :
  the condition has length > 1 and only the first element will be used
Warning: Removed 8 rows containing missing values (geom_signif).
Warning in if (x < 0.001) { :
  the condition has length > 1 and only the first element will be used
Warning: Removed 8 rows containing missing values (geom_signif).
Warning in if (x < 0.001) { :
  the condition has length > 1 and only the first element will be used
Warning: Removed 8 rows containing missing values (geom_signif).
Warning in if (x < 0.001) { :
  the condition has length > 1 and only the first element will be used
Warning: Removed 8 rows containing missing values (geom_signif).
Warning in if (x < 0.001) { :
  the condition has length > 1 and only the first element will be used
Warning: Removed 8 rows containing missing values (geom_signif).
#######################################################################
figure_main


#save the figures:
ggsave(
  "BMFvsLabsMain.png",
  plot = figure_main,
  device = NULL,
  path = NULL,
  scale = 1.2,
  width = NA,
  height = NA,
  units = c("in", "cm", "mm", "px"),
  dpi = 300,
  limitsize = TRUE,
  bg = NULL
)
Saving 8.24 x 5.09 in image

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "James P. Johnson - Clinical Blood Chemistries LIMMA - v3-6-23"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

```{r}
#Load some useful libraries
library(limma)
library(DESeq2)
library(edgeR)
library(data.table)
library(tidyr)
library(magrittr)
library(tidyverse)
library(sjlabelled)
library(sjmisc)
library(ggplot2)
library(compositions)
library(broom)
library(DataCombine)
library(ggsignif)
library(ggpubr)
```


```{r}
#import data
countdf <- read.csv(file = 'labs.csv')
rownames(countdf) <- NULL
meta <- read.csv(file = 'labs_metadata.csv')
rownames(meta) <- NULL
full <- read.csv(file = 'labs_full.csv')
rownames(full) <- NULL

#clean-up tables
#prepare for model
#make consistent index/column/label/ordering between tables/matrices
names(countdf)[1] <- 'name'
countdf[countdf==0] <- 0.0000001

# remove added X's from PCI names
names(countdf) <- sapply(str_remove_all(names(countdf),"X"),"[") 
count_matrix <- countdf
count_matrix <- count_matrix[,2:ncol(count_matrix)]
count_matrix <- as.matrix(count_matrix)
names(full) <- sapply(str_remove_all(colnames(full),"X"),"[")
names(count_matrix) <- sapply(str_remove_all(colnames(count_matrix),"X"),"[")

#factorize sex
meta$sex <- factor(meta$sex)
full$sex <- factor(full$sex)

#factorize BMF
meta$bowel <- factor(as.numeric(factor(meta$bowel, levels = c(1,2,3,4), labels = c(1,2,3,4))))
full$bowel <- factor(as.numeric(factor(full$bowel, levels = c(1,2,3,4), labels = c(1,2,3,4))))

#set reference category to be "High Normal" BMF (level = 3)
meta <- within(meta, bowel <- relevel(bowel, ref = 3))
full <- within(full, bowel <- relevel(bowel, ref = 3))

as.data.frame(count_matrix)
countdf <- DropNA(countdf)
meta <- DropNA(meta)
full <- DropNA(full)
countdf
meta
full
```


```{r}
#design GLM using lmFit and eBayes
#Algorithm provided by and adapted from Christian Diener, PhD:
###########################################################################################################
design <- model.matrix(~bowel + sex + age + BMI_CALC + eGFR, full)
dge <- DGEList(counts=count_matrix)  # where `count_matrix` is the matrix mentioned above
logCPM <- cpm(dge, log=TRUE)  # takes the log of the data
fit <- lmFit(logCPM, design)  # fits the model for all labs
fit <- eBayes(fit)  # stabilizes the variances
###########################################################################################################
```


```{r}
#Pre-process for plotting:

#Get results table for Constipation coefficient relative to High Normal BMF:
re_const <- topTable(fit, coef = 2, genelist = countdf$name, sort="p", number="none")  # Select the significant models by coefficient 2
re_low <- topTable(fit, coef = 3, genelist = countdf$name, sort="p", number="none")  # Select the significant models by coefficient 3
re_diarrhea <- topTable(fit, coef = 4, genelist = countdf$name, sort="p", number="none")  # Select the significant models by coefficient 4

indices <- match(rownames(re_const), rownames(meta)) # associate column of labs names with index of lab
re_const[1] <- countdf$name[indices] # associate the labs names with the labs indices
p_const <- re_const[re_const$adj.P.Val < 0.05,] # create df of just significant adj P value results
p_const <- p_const[order(p_const$adj.P.Val),] # order by adj P value
p_const <- p_const[,c('logFC','B','adj.P.Val','ID','P.Value')] # keep only desired columns

#Get results table for Low Normal coefficient relative to High Normal BMF:
indices <- match(rownames(re_low), rownames(meta)) # associate column of labs names with index of lab
re_low[1] <- countdf$name[indices] # associate the labs names with the labs indices
p_low <- re_low[re_low$adj.P.Val < 0.05,] # create df of just significant adj P value results
p_low <- p_low[order(p_low$adj.P.Val),] # order by adj P value
p_low <- p_low[,c('logFC','B','adj.P.Val','ID','P.Value')] # keep only desired columns

#Get results table for Diarrhea coefficient relative to High Normal BMF:
indices <- match(rownames(re_diarrhea), rownames(meta)) # associate column of labs names with index of lab
re_diarrhea[1] <- countdf$name[indices] # associate the labs names with the labs indices
p_diarrhea <- re_diarrhea[re_diarrhea$adj.P.Val < 0.05,] # create df of just significant adj P value results
p_diarrhea <- p_diarrhea[order(p_diarrhea$adj.P.Val),] # order by adj P value
p_diarrhea <- p_diarrhea[,c('logFC','B','adj.P.Val','ID','P.Value')] # keep only desired columns



#Show dfs of significant hits (there are none)
sig_const <- p_const[which(p_const$adj.P.Val < 0.05),]
sig_low <- p_low[which(p_low$adj.P.Val < 0.05),]
sig_diarrhea <- p_diarrhea[which(p_diarrhea$adj.P.Val < 0.05),]
sig_low$bowel <- 'Low Normal'
sig_low
sig_diarrhea # no hits
sig_const # no hits
```



```{r}
#Main Text Figures
comparisons = list(c("Constipation","High Normal"),c("Low Normal","High Normal"),c("Diarrhea","High Normal"))
titles = list(c('OMEGA-6/OMEGA-3 RATIO'),c('EPA'),c('HOMOCYSTEINE, SERUM'),c('EOSINOPHILS'), c('BASOPHILS'))

myplots_main <- list()  # new empty list
#annotated only
labs_testanno <- cbind(full[,1],full[,6],as.data.frame(clr(full[,7:ncol(full)][,as.numeric(rownames(sig_low))])))
names(labs_testanno)[1] <- 'public_client_id'
names(labs_testanno)[2] <- 'bowel'
labs_testanno$bowel <- factor(labs_testanno$bowel, levels=c(1,2,3,4), labels = c("Constipation","Low Normal","High Normal","Diarrhea"))
labs_testanno$bowel <- factor(labs_testanno$bowel, levels=c("Constipation","Low Normal","High Normal","Diarrhea"), labels = c("Constipation","Low Normal","High Normal","Diarrhea"))

#Plotting:
#######################################################################
counter<<-1

# Annotation function:
sig = function(x){
  if(x < 0.001){"***"} 
  else if(x < 0.01){"**"}
  else if(x < 0.05){"*"}
  else{NA}}

# Special Graphing Function:
########################################
test = function(x,y,z,j) {
  x = NULL
  y = NULL
  
  temp<- names(labs_testanno)[[j+2]]
  temp_size <- merge(labs_testanno[,c(1,2,j+2)],labs_testanno[,c('public_client_id','bowel')])
  temp_size <- temp_size[,c('bowel',temp)]
  temp_size <- temp_size[which(!is.na(temp_size)),]
  temp_size <- length(levels(factor(temp_size$bowel)))
  
  if (counter==1) {
    results = ifelse(nrow(sig_low[which(sig_low['bowel'] == 'Constipation'),]['adj.P.Val'])!=0,
           list(p.value = sig_low[which(sig_low['bowel'] == 'Constipation'),]['adj.P.Val'][[1]]),
           list(p.value = 1))
  }
  else if (counter==2) {
    results = ifelse(nrow(sig_low[which(sig_low['bowel'] == 'Low Normal'),]['adj.P.Val'])!=0,
           list(p.value = sig_low[which(sig_low['bowel'] == 'Low Normal'),]['adj.P.Val'][[1]]),
           list(p.value = 1))
  }
  else {
     results = ifelse(nrow(sig_low[which(sig_low['bowel'] == 'Diarrhea'),]['adj.P.Val'])!=0,
           list(p.value = sig_low[which(sig_low['bowel'] == 'Diarrhea'),]['adj.P.Val'][[1]]),
           list(p.value = 1))
  }
  
  if (temp_size == 2) { #if the # of levels to the omic is 2 categories (incl High Normal)
    counter<<-1
    }
  
  if (temp_size == 3) { #if the # of levels to the omic is 3 categories (incl High Normal)
    counter<<-counter+1
    if (counter > 2) {counter<<-1}
  }
  
  if (temp_size == 4) { #if the # of levels to the omic is all 4 categories (incl High Normal)
    counter<<-counter+1
    if (counter > 3) {counter<<-1}
  }
  
  names(results) <- 'p.value'
  return(results)
}
##########################################

#for every significant hit, plot the graph for it and annotate it properly using the above function:
for (ind in 1:nrow(sig_low)) {
  y_name <- paste(names(labs_testanno)[c(ind+2)],sep="")
  myplots_main[[ind]] <- local({
    plotlim_loweranno = min(labs_testanno[[y_name]])
    plotlim_upperanno = max(labs_testanno[[y_name]])
    plotlim_baranno = plotlim_loweranno - 3.5
    plotlim_marginanno = plotlim_loweranno - 5
    labs_testanno <- labs_testanno[,c('bowel','public_client_id',y_name)]
    plt_anno <- ggplot(data = labs_testanno, aes(x = bowel, y = .data[[y_name]], group = bowel)) +
    scale_x_discrete(guide = guide_axis(n.dodge = 2))+
    geom_jitter(aes(color = bowel),  size = 0.1, cex = 0.05) +
    geom_boxplot(alpha=0.0,outlier.shape = NA) +
    theme(text = element_text(size = 9)) +
    ggtitle(label = str_wrap(titles[[ind]], width = 2)) +
    geom_signif(comparisons = comparisons, map_signif_level = sig, test = 'test', test.args = list(z = comparisons, j = ind), 
                y_position = plotlim_baranno, 
                step_increase = 0.15,  size = 0.5, 
                textsize = 1.5,
                tip_length = c(0,0)) +
    coord_cartesian(ylim=c(plotlim_loweranno,plotlim_upperanno),clip="off")+
    labs(color = "BMF Category", y = ifelse(ind == 1, "Log-Transformed\n Clinical Labs Level",""))+
    guides(colour = guide_legend(override.aes = list(size=7), title.position = 'left', nrow = 1, ncol = 4)) +
      
      
    theme(plot.margin = unit(c(0,0,abs(plotlim_marginanno),0), "cm"),
          legend.title = element_text(size=10), 
          legend.text = element_text(size=7),
          axis.text.x = element_blank(), 
          axis.title.x = element_blank()) +
      
    scale_fill_manual(limits = c("Constipation","Low Normal","High Normal","Diarrhea"), labels = c("Constipation","Low Normal","High Normal","Diarrhea"), values = colors(),
                    drop = FALSE)
  })
}

#store the figures to save them:
counter <<- 1
figure_main <- ggarrange(plotlist = myplots_main[1:length(myplots_main)], labels = LETTERS[1:5], legend = "top", align = "hv", common.legend = TRUE, nrow = 1, ncol = 5)

#######################################################################
figure_main

#save the figures:
ggsave(
  "BMFvsLabsMain.png",
  plot = figure_main,
  device = NULL,
  path = NULL,
  scale = 1.2,
  width = NA,
  height = NA,
  units = c("in", "cm", "mm", "px"),
  dpi = 300,
  limitsize = TRUE,
  bg = NULL
)
```



Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
